Libraries
library(tidyverse)
library(readxl)
library(ggforce)
library(knitr)
library(forcats)

pitcher_test <- pitchers %>% 
  mutate(group_indicator = case_when(
    ID == "Great" ~ 1,
    ID == "Decent" ~ 2,
    ID == "Bad" ~ 3))

# Correlation between spin rate and run expectancy
pitcher_test %>% 
  filter(!is.na(release_spin_rate) & !is.na(run_exp_added)) %>% 
  with( cor(release_spin_rate, run_exp_added) )
## [1] 0.01304765
# Correlation between spin rate and run expectancy
pitcher_test %>% 
  filter(!is.na(pitch_speed) & !is.na(run_exp_added)) %>% 
  with( cor(pitch_speed, run_exp_added) )
## [1] -0.008914642
# Can batted ball type predict run expectancy?
lm(run_exp_added ~ bb_type, data = pitcher_test)
## 
## Call:
## lm(formula = run_exp_added ~ bb_type, data = pitcher_test)
## 
## Coefficients:
##        (Intercept)  bb_typeground_ball   bb_typeline_drive        bb_typepopup  
##            -0.1025              0.1706             -0.1393              0.3284
# Intercept = Fly Ball

test_model <- pitcher_test %>% 
  filter( !(player_name  %in% 
             c("Scherzer, Max", "Taillon, Jameson", "Berríos, José")) ) %>% 
lm(run_exp_added ~ bb_type, data = .)

test_testdata <- pitcher_test %>% 
  filter(player_name  %in% 
             c("Scherzer, Max", "Taillon, Jameson", "Berríos, José"),
         !is.na(bb_type)) %>% 
  select(ID, pitch_type, run_exp_added, bb_type)

test_testdata %>% 
  mutate(preds = predict(test_model, test_testdata))
## # A tibble: 1,442 × 5
##    ID    pitch_type run_exp_added bb_type       preds
##    <chr> <chr>              <dbl> <chr>         <dbl>
##  1 Great SL                 0.207 ground_ball  0.0784
##  2 Great CU                 0.221 ground_ball  0.0784
##  3 Great CH                -1.03  fly_ball    -0.111 
##  4 Great FF                 0.181 fly_ball    -0.111 
##  5 Great FF                -1.66  fly_ball    -0.111 
##  6 Great SL                -0.27  fly_ball    -0.111 
##  7 Great FF                 0.206 ground_ball  0.0784
##  8 Great SL                -0.183 ground_ball  0.0784
##  9 Great SL                 0.406 ground_ball  0.0784
## 10 Great FC                -0.752 ground_ball  0.0784
## # ℹ 1,432 more rows
# row.names = FALSE
arsenal %>% 
  filter(pitch_type == "SL",
         pitch_hand == "L") %>% 
  with(cor(sqrt(pitcher_break_x^2 + pitcher_break_z^2), rv100))
## [1] -0.01633807
pitchers %>% 
  filter(player_name  %in% "Scherzer, Max",
         !is.na(bb_type)) %>% 
  select(ID, pitch_type, run_exp_added, bb_type)
## # A tibble: 357 × 4
##    ID    pitch_type run_exp_added bb_type    
##    <chr> <chr>              <dbl> <chr>      
##  1 Great SL                 0.207 ground_ball
##  2 Great CU                 0.221 ground_ball
##  3 Great CH                -1.03  fly_ball   
##  4 Great FF                 0.181 fly_ball   
##  5 Great FF                -1.66  fly_ball   
##  6 Great SL                -0.27  fly_ball   
##  7 Great FF                 0.206 ground_ball
##  8 Great SL                -0.183 ground_ball
##  9 Great SL                 0.406 ground_ball
## 10 Great FC                -0.752 ground_ball
## # ℹ 347 more rows
pitchers %>% 
  filter(pitch_type == "SL",
         !is.na(pfx_x), !is.na(release_spin_rate)) %>% 
  with(cor(pfx_x, release_spin_rate))
## [1] 0.1578424
avgs <- arsenal %>% 
  group_by(pitch_name) %>% 
  summarize(Speed = round(weighted.mean(pitch_speed, pitch_usage*pitches), 1),
            "Spin Rate" = round(weighted.mean(spin_rate, pitch_usage*pitches), 0),
            "H. Break (in.)" = round(weighted.mean(pitcher_break_x, pitch_usage*pitches), 1),
            "V. Break (in.)" = round(weighted.mean(pitcher_break_z, pitch_usage*pitches), 1),
            wOBA = round(weighted.mean(wOBA, pitch_usage*pitches), 3),
            "Whiff Rate" = round(weighted.mean(whiff_percent, pitch_usage*pitches), 1),
            "Hard Hit Rate" = round(weighted.mean(hard_hit_percent, pitch_usage*pitches), 1)) %>% 
  as.data.frame() %>% 
  mutate("Whiff Rate" = paste0(`Whiff Rate`, "%"),
         "Hard Hit Rate" = paste0(`Hard Hit Rate`, "%")) %>% 
  rename(Pitch = pitch_name)

avgs %>% 
  kable()
Pitch Speed Spin Rate H. Break (in.) V. Break (in.) wOBA Whiff Rate Hard Hit Rate
4-Seamer 94.1 2280 7.4 14.6 0.340 22.1% 44%
Changeup 85.0 1778 14.4 32.3 0.287 31.5% 31.1%
Curveball 79.7 2548 9.1 52.9 0.277 31.9% 34.1%
Cutter 89.8 2386 3.1 25.3 0.323 23.9% 35.9%
Sinker 93.5 2133 15.1 23.6 0.353 15.2% 42.1%
Slider 85.1 2423 5.9 36.1 0.279 35.1% 33.3%
Slurve 82.3 2622 15.7 42.5 0.262 27.3% 33.8%
Splitter 87.2 1427 11.8 32.3 0.243 36.8% 32.5%
Sweeper 82.1 2626 14.7 39.5 0.258 33.9% 27.2%